# HH Analysis
# Modified by: Mamoor Ali Khan
# Date Modified: Feb 26, 2025

###
# ----IMPORTANT NOTICE----
###
#### IF There is an error please restart R-session to eliminate any clashes.


options(scipen = 999)
# Clear data
rm(list = ls())

# Install and load pacman if not already done
if (!requireNamespace("pacman", quietly = TRUE)) {
   install.packages("pacman")
}
# pacman::p_unload(all)
pacman::p_load(
   tidyverse, estimatr, here, xtable, readxl, geosphere, extrafont, lubridate, TOSTER,
   showtext
)

# Load Arial or a fallback if Arial is not found
font_add("Arial", regular = "Arial.ttf") # or path to the Arial font on your system
showtext_auto() # Turn on showtext rendering



source("helpers.R")
###
# ----Global variables----
###

# Index ending in std has been restandardized
main_hh_outcomes <- c(
   # Incumbent evaluations index
   "incumbent_evaluations_index",
   "incumbent_evaluations_index_std",
   # Composite indices for incumbent evaluation index
   "mpa_therm",
   "mpa_party_therm",
   "mpa_vote",
   "inverse_mpa_rank",
   # Political partipation index
   "participation_index",
   "participation_index_std",
   # Composite indices for participation index
   "voted",
   "any_rally",
   "any_meeting",
   # Prospects for accountability index
   "accountability_prospects_index",
   "accountability_prospects_index_std",
   # Composite indices for accountability index
   "efficacy",
   "voting_decision_incumbent_performance_ranking_inverse",
   "n_political_conversations",
   # Global index
   "global_index",
   "global_index_std",
   # Alternative indices
   "participation_with_convos_index_std",
   "accountability_prospects_without_convos_index_std"
)
main_hh_outcomes_endline <- paste0(main_hh_outcomes, "_end")

###
# ----Ingest data----
###


ps_location <- readRDS("../1_data/3_ps_data/99_ps_location_info.rds")


hh_temp <- read_rds("../1_data/1_hh_data/hh_master.rds")



# MPA data
mpa <- read_csv("../1_data/2_mpa_data/02_mpas_with_treatment.csv")

# Pilot data
# pilot_dat <- read_csv("../1_data/ivr_pilot_wide.csv")

# Merge in competitiveness
hhw <- left_join(
   hh_temp %>%
      mutate(
         age_bin = recode(
            age_bin,
            `18 to 29` = "18-29",
            `30 to 39` = "30-39",
            `40 to 49` = "40-49",
            `50 to 59` = "50-59",
            `60+` = "60+"
         ),
         educ_bin = recode(
            educ_bin,
            `1 to 9` = "1-9",
            `10 to 13` = "10-13"
         )
      ) %>%
      mutate_at(
         vars(dist_hujra_2018, dist_district_hq),
         list(bin = ~ cut(., breaks = c(0, 10, 20, 50)))
      ) %>%
      group_by(pa_id) %>%
      mutate_at(
         vars(dist_hujra_2018, dist_district_hq),
         list(cons_tercile = ~ cut(
            .,
            breaks = quantile(., probs = c(0, 1 / 3, 2 / 3, 1), na.rm = TRUE),
            include.lowest = TRUE,
            labels = c("bottom third", "middle third", "upper third")
            # labels = if (any(pa_id != "PK-50")) c("bottom third", "middle third", "upper third")  else NULL
         ))
      ) %>%
      ungroup() %>%
      mutate(
         w1_s1_call_answerphone = ifelse(w1_s1_call == 0, 0, s1_phone_answer),
         w1_s2_all_responsive_answerbothcalls = case_when(
            w1_s2_all_responsive == 0 ~ 0L,
            w1_s2_all_responsive == 1 ~ as.integer(s2_phone_answer & s1_phone_answer)
         ),
         w1_s2_all_responsive_answerbothcallsandfirstq = case_when(
            w1_s2_all_responsive == 0 ~ 0L,
            w1_s2_all_responsive == 1 ~ as.integer(s2_phone_answer & s1_phone_answer & s1_answered_question)
         )
      ),
   mpa %>%
      mutate(
         pa_id = paste0("PK-", pa_id),
         vote_margin = vswinner13 - vsrunnerup13,
         inc_party = party,
         inc_pti = as.integer(party == "PTI")
      ) %>%
      select(pa_id, vote_margin, inc_pti)
)

# Hujra status quo communication data
# Labels for later
pop_sample_name <- "Random\nsample"
ivr_sample_name <- "IVR:\nanswered\nphone + Q"
ivrphone_sample_name <- "IVR:\nanswered\nphone"
hujra_sample_name <- "Status Quo:\nmet MPA\nin last yr"


hujra <- read_rds("../1_data/5_2020_hujra_survey/00_hujra_survey.rds")





hcheck <- left_join(
   hujra %>% select(clean_id, unreachable, starts_with("met_"), starts_with("meeting_"), contains("issue")),
   hhw %>%
      mutate(
         voted_index = as.integer(voted == 1 & votedlge15 == 1),
         voted_index = ifelse(is.na(voted_index), 0L, voted_index)
      )
) %>%
   ungroup() %>%
   mutate(
      voted_index = as.integer(voted == 1 & votedlge15 == 1),
      voted_index = ifelse(is.na(voted_index), 0L, voted_index),
      sample = hujra_sample_name,
      # access index
      wealth_index = scale(scale(income_scale) + scale(pmin(educ_years, 18)) + scale(knows_pmln_not_kp) + scale(knows_president_scale)),
      copartisan_index = scale(scale(copartisan) + scale(mpa_therm)),
      mpaeffective = mpa_fixroads + mpa_getfamilyjob
   )


hcheck <- left_join(
   hujra %>% select(clean_id, unreachable, starts_with("met_"), starts_with("meeting_"), contains("issue")),
   hhw %>%
      mutate(
         voted_index = as.integer(voted == 1 & votedlge15 == 1),
         voted_index = ifelse(is.na(voted_index), 0L, voted_index)
      )
) %>%
   ungroup() %>%
   mutate(
      voted_index = as.integer(voted == 1 & votedlge15 == 1),
      voted_index = ifelse(is.na(voted_index), 0L, voted_index),
      sample = hujra_sample_name,
      # access index
      wealth_index = scale(scale(income_scale) + scale(pmin(educ_years, 18)) + scale(knows_pmln_not_kp) + scale(knows_president_scale)),
      copartisan_index = scale(scale(copartisan) + scale(mpa_therm)),
      mpaeffective = mpa_fixroads + mpa_getfamilyjob
   )

###
# DROPOFF CDF FIGURE
###

block_data <- tibble(
   pa_id = paste0("PK-", c(
      1, 16, 22, 23, 26, 29, 37, 7, 70, 71, 80,
      27, 77, 78, 9, 89,
      50, 87, 97
   )),
   question_block = c(rep(5, times = 11), rep(4, times = 5), rep(3, times = 3))
)

hhw_calls_dat <- hhw %>%
   filter(w1_s1_treat == "H3_call_qs" & pa_id != "PK-56") %>%
   left_join(block_data) %>%
   mutate(
      max_blocks = pmax(s1_blocks_reached, s1_blocks_reached_recall, na.rm = TRUE),
      max_duration = pmax(s1_call_duration, s1_call_duration_recall, na.rm = TRUE),
      reached_q = max_blocks >= question_block | s1_answered_question
   )



hhw_calls <- hhw_calls_dat %>%
   group_by(pa_id) %>%
   mutate(cdf = case_when(
      !reached_q ~ pmin(1, max_duration / max(max_duration[reached_q], na.rm = TRUE)),
      s1_answered_question ~ 3,
      TRUE ~ pmin(1.99999999, 1 + max_duration / quantile(max_duration[s1_answered_question], probs = 0.5, na.rm = TRUE))
   ))

s1_ecdf <- ecdf(hhw_calls$cdf)

ecdf_dat <- tibble(
   x = seq(0.0000001, 1.9999999, length = 10000),
   ecdf = mean(hhw_calls$s1_phone_answer, na.rm = TRUE) * (1 - s1_ecdf(x)),
   call_section = case_when(
      x <= 1 ~ "Credit claiming",
      x > 1 ~ "Asking IVR Q"
   )
) %>%
   bind_rows(
      tibble(x = c(-0.5, -0.25), ecdf = 1, call_section = "Attempted\ncall")
   ) %>%
   bind_rows(
      tibble(x = c(-0.25 + .Machine$double.eps, 0), ecdf = mean(hhw_calls$s1_phone_answer, na.rm = TRUE), call_section = "Answered\nphone")
   ) %>%
   bind_rows(
      tibble(x = c(2, 2.25, 2.25), ecdf = c(rep(mean(hhw_calls$s1_answered_question, na.rm = TRUE), 2), 0), call_section = "Answered\nIVR Q")
   )


label_dat <- tibble(
   xstart = c(0.3, 2.05),
   xend = c(0, 2),
   ystart = c(1, 0.8),
   yend = c(0.85, 0.32),
   label = c("Never\npicked up", "Listened w/o\nanswering Q")
)


ggplot(ecdf_dat, aes(x = x, y = ecdf)) +
   geom_line(size = 1.2) +
   geom_area(aes(fill = call_section)) +
   geom_text(
      data = ecdf_dat %>% filter(ecdf != 0) %>% group_by(call_section) %>% summarize(x = mean(x), ecdf = min(ecdf)),
      mapping = aes(label = call_section),
      nudge_y = -0.1,
      size = 2.1,
      family = "Arial"
   ) +
   scale_fill_manual(values = RColorBrewer::brewer.pal(8, "Blues")[c(5, 2, 4, 1, 3)]) +
   scale_y_continuous(
      limits = c(0, 1.1),
      breaks = seq(0, 1, by = 0.2),
      minor_breaks = seq(0.1, 0.9, by = 0.2),
      expand = expansion(add = c(0, 0))
   ) +
   scale_x_continuous(expand = expansion(add = c(0, 0)), breaks = NULL) +
   ylab("Empirical CDF") +
   xlab("Time of call (rescaled)") +
   theme_classic(base_size = 12, base_family = "Arial") +
   theme(
      legend.position = "none",
      panel.grid.major.y = element_line(color = "grey85", linetype = 2, size = 0.5),
      panel.grid.minor.y = element_line(color = "grey85", linetype = 2, size = 0.2),
      text = element_text(family = "Arial")
   )
ggsave("../3_output/1_figures/fig-5.pdf", width = 4.5, height = 2.5)











###
# ----Endline attrition----
###



# Other explanations
endline_attrition_model <- lm_robust(
   endline_surveyed ~ copartisan + age_bin + factor(income_scale) + educ_bin + knows_president + factor(mpa_therm),
   data = hhw %>%
      mutate(
         income_scale = ifelse(is.na(income_scale), "missing", as.character(income_scale)),
         mpa_therm = case_when(
            is.na(mpa_therm) ~ "missing",
            TRUE ~ as.character(cut(mpa_therm, breaks = c(-Inf, 4, 9, 10)))
         ),
         copartisan = ifelse(is.na(copartisan), "missing", as.character(copartisan))
      )
)
endline_attrition_model

endline_attrition_fits <- 1 - endline_attrition_model$fitted.values
hhw$endline_attrition_weights <- 1 /
   ((1 - hhw$endline_surveyed) * endline_attrition_fits + hhw$endline_surveyed * (1 - endline_attrition_fits))


# Missingness
attrition_model <- lm_robust(
   unreachable ~ copartisan + age_bin + factor(income_scale) + educ_bin + knows_president + factor(mpa_therm),
   data = hcheck %>%
      mutate(
         income_scale = ifelse(is.na(income_scale), "missing", as.character(income_scale)),
         mpa_therm = ifelse(is.na(mpa_therm), "missing", as.character(mpa_therm)),
         copartisan = ifelse(is.na(copartisan), "missing", as.character(copartisan))
      )
)
summary(attrition_model)
attrition <- attrition_model$fitted.values
hcheck$attrition_weights <- with(hcheck, 1 / (unreachable * attrition + (1 - unreachable) * (1 - attrition)))

with(hcheck %>% filter(!unreachable), plot(income_scale, attrition_weights))


# Cumulative pct meeting MPA going backward
hdm <- hujra %>%
   left_join(hcheck %>% select(clean_id, attrition_weights)) %>%
   filter(!unreachable) %>%
   mutate(
      coded_year_of_meeting = case_when(
         coded_year_of_meeting == 2020 & coded_month_of_meeting == 10 ~ 2019,
         TRUE ~ coded_year_of_meeting
      ),
      months_ago = (2020 - coded_year_of_meeting) * 12 + 6 - coded_month_of_meeting,
      n = sum(attrition_weights),
   ) %>%
   filter(met_mpa == 1 & !is.na(coded_year_of_meeting)) %>%
   arrange(months_ago) %>%
   mutate(met_mpa_share = cumsum(met_mpa * attrition_weights) / n) %>%
   group_by(months_ago) %>%
   summarize(met_mpa_share = max(met_mpa_share)) %>%
   full_join(tibble(months_ago = 0:86)) %>%
   arrange(months_ago) %>%
   fill(met_mpa_share) %>%
   mutate(met_mpa_share = ifelse(months_ago >= 48, max(met_mpa_share), met_mpa_share)) %>%
   filter(months_ago <= 48) %>%
   mutate(
      months_ago = factor(
         ifelse(months_ago == 48, "48+", as.character(months_ago)),
         levels = c(as.character(0:47), "48+")
      )
   )



###
# 2. Contact Variance
###

hhw_h <- left_join(
   hhw %>%
      mutate(
         voted_index = as.integer(voted == 1 & votedlge15 == 1),
         voted_index = ifelse(is.na(voted_index), 0L, voted_index)
      ),
   hcheck %>%
      select(clean_id, unreachable, starts_with("met_"), starts_with("meeting_"), attrition_weights)
)

hs <- hhw_h %>%
   filter(pa_id != "PK-56", w1_s1_treat == "H3_call_qs") %>%
   group_by(pa_id, ps_name) %>%
   summarize(
      metmpa = weighted.mean(met_mpa_2019_6_2020, w = attrition_weights, na.rm = TRUE),
      ivrphone = mean(s1_phone_answer, na.rm = TRUE),
      ivrq = mean(s1_answered_question, na.rm = TRUE),
      dist_hujra_2018 = unique(dist_hujra_2018)
   ) %>%
   arrange(ivrq, metmpa, ) %>%
   mutate(
      ps_rank = order(dist_hujra_2018)
   ) %>%
   gather(key, val, metmpa, ivrphone, ivrq) %>%
   mutate(val_bin = cut(val * 100, breaks = c(-Inf, 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100)))


# PS Level
ps_hujra <- hcheck %>%
   group_by(pa_id, ps_name) %>%
   summarize(
      dist_hujra_2018 = unique(dist_hujra_2018),
      dist_hujra_2013 = unique(dist_hujra_2013),
      dist_district_hq = unique(dist_district_hq),
      pct_ivr_phone = mean(s1_phone_answer, na.rm = TRUE) * 100,
      pct_ivr_q = mean(s1_answered_question, na.rm = TRUE) * 100,
      pct_hujra_1yr = weighted.mean(met_mpa_2019_6_2020, w = attrition_weights, na.rm = TRUE) * 100,
   ) %>%
   group_by(pa_id) %>%
   mutate_at(
      vars(dist_hujra_2018, dist_hujra_2013),
      list(cons_tercile = ~ cut(
         .,
         breaks = quantile(., probs = c(0, 1 / 3, 2 / 3, 1), na.rm = TRUE),
         include.lowest = TRUE,
         labels = c("bottom third", "middle third", "upper third")
         # labels = if (any(pa_id != "PK-50")) c("bottom third", "middle third", "upper third")  else NULL
      ))
   ) %>%
   mutate_at(
      vars(dist_hujra_2018, dist_hujra_2013),
      list(log = ~ log(.))
   )


# PA level
# Read in 2018 results
pk18 <- read_csv("../1_data/6_electoral_data/pk_constituency_data_2018.csv") %>%
   select(constituency_code, MOV_pct, win_pct) %>%
   rename(
      new_pa_id = constituency_code,
      mov_18 = MOV_pct,
      vswinner_18 = win_pct
   )

pk13 <- read_csv("../1_data/2_mpa_data/02_mpas_with_treatment.csv") %>%
   select(pa_id, vswinner13, vsrunnerup13) %>%
   mutate(
      pa_id = paste0("PK-", pa_id),
      mov_13 = vswinner13 - vsrunnerup13
   ) %>%
   rename(vswinner_13 = vswinner13)


# Polling station level data has the files to link
psdat <- readRDS("../1_data/3_ps_data/ps_loc_summary.rds")


pa_hujra <- hcheck %>%
   group_by(pa_id) %>%
   summarize(
      pct_ivr_phone = mean(s1_phone_answer, na.rm = TRUE) * 100,
      pct_ivr_q = mean(s1_answered_question, na.rm = TRUE) * 100,
      pct_hujra_1yr = weighted.mean(met_mpa_2019_6_2020, w = attrition_weights, na.rm = TRUE) * 100,
   ) %>%
   left_join(psdat) %>%
   left_join(pk18) %>%
   left_join(pk13)


var_hujra_dat <- bind_rows(
   ps_hujra %>% mutate(level = "Polling station\nlevel"),
   pa_hujra %>% mutate(level = "Constituency\nlevel")
)


# Plot showing variance
ggplot(
   data = var_hujra_dat %>%
      gather(key, val, starts_with("pct")) %>%
      mutate(
         key = recode_factor(
            key,
            `pct_hujra_1yr` = hujra_sample_name,
            `pct_ivr_phone` = ivrphone_sample_name,
            `pct_ivr_q` = ivr_sample_name
         ),
         level = factor(level, levels = c("Polling station\nlevel", "Constituency\nlevel")),
         val = val
      ),
   aes(x = key, y = val, color = key)
) +
   geom_violin(adjust = 2 / 3) +
   facet_grid(cols = vars(level)) +
   ylab("Percent of voters\nreporting contact with MPA") +
   scale_color_manual(values = RColorBrewer::brewer.pal(7, "Blues")[c(3, 6, 7)]) +
   theme_bw() +
   theme(
      text = element_text(family = "Arial"),
      axis.text = element_text(color = "black"),
      axis.title.x = element_blank(),
      legend.position = "none"
   )
ggsave("../3_output/1_figures/fig-6.pdf", width = 5, height = 3)


###
# 3. an_status_quo_density
###

ggplot(
   hdm %>% mutate(months_ago = factor(months_ago, levels = c(seq(0, 47), "48+"))),
   aes(x = months_ago, y = met_mpa_share * 100, group = 1)
) +
   geom_step(direction = "hv") +
   geom_hline(yintercept = 17.3, linetype = 2) +
   annotate("text",
      x = 25, y = 20, label = "IVR contact rate\n(Answer Q, 17.3%)",
      family = "serif", size = 4, hjust = 0
   ) + # Changed to a default font like "serif"
   xlab("Month since last met MPA") +
   ylab("Percent of study sample") +
   scale_x_discrete(breaks = c(as.character(seq(0, 42, by = 6)), "48+")) +
   ylim(0, 30) +
   theme_bw() +
   theme(
      text = element_text(family = "serif"), # Changed font family to "serif"
      axis.text = element_text(color = "black")
   )
ggsave("../3_output/1_figures/fig-7.pdf", width = 4.5, height = 2.75)


###
# 4. an_hujra_discussion
###

# Create figure of issues
hcheck %>%
   filter(!is.na(discussed_issue_coded), !is.na(nature_of_discussed_issue), nature_of_discussed_issue != 3, met_mpa_2019_6_2020 == 1) %>%
   mutate(
      `Nature of\nrequest` = recode_factor(
         discussed_issue_coded,
         `Accesibility` = "Road paving/\nconstruction",
         `Street Pavement` = "Road paving/\nconstruction",
         `Electricity Issues` = "Electricity/gas",
         `Fuel Scarcity` = "Electricity/gas",
         `Gas Issue` = "Electricity/gas",
         `Water and Sanitation` = "Water/sanitation/health",
         `Health Issue` = "Water/sanitation/health",
         `Garbage Issue` = "Water/sanitation/health",
         `Employment` = "Employment",
         `Requested aid from MPA` = "Financial assistance",
         .default = "Other"
      ),
      `Target of request` = recode_factor(
         nature_of_discussed_issue,
         `1` = "Individual",
         `2` = "Community",
         `3` = "NA\n(not a request)"
      ),
      total_n = sum(attrition_weights)
   ) %>%
   group_by(`Nature of\nrequest`, `Target of request`) %>%
   summarize(perc = 100 * sum(attrition_weights) / unique(total_n)) %>%
   ggplot(
      aes(x = `Target of request`, group = `Nature of\nrequest`, fill = `Nature of\nrequest`, y = perc)
   ) +
   geom_col() +
   scale_fill_manual(values = c("#1b9e77", "#d95f02", "#7570b3", "#e7298a", "#66a61e", "#e6ab02")) +
   ylab("Percent of meetings\nin last year") +
   theme_bw(base_family = "Arial") +
   theme(axis.text = element_text(color = "black"), legend.key.size = unit(1.75, "line"))
ggsave("../3_output/1_figures/fig-8.pdf", width = 4.5, height = 3)





###
# 5. sample_distribution
###



create_hujra_plot <- function(maindat, hdat) {
   sq_dat <- maindat %>%
      mutate(sample = pop_sample_name) %>%
      bind_rows(
         maindat %>%
            filter(s1_answered_question == 1) %>%
            mutate(sample = ivr_sample_name, attrition_weights = 1),
         maindat %>%
            filter(s1_phone_answer == 1) %>%
            mutate(sample = ivrphone_sample_name, attrition_weights = 1)
      ) %>%
      mutate(
         voted_index = as.integer(voted == 1 & votedlge15 == 1),
         voted_index = ifelse(is.na(voted_index), 0L, voted_index),
         attrition_weights = 1
      ) %>%
      bind_rows(
         purrr::map_dfr(seq_len(1), ~hdat)
      ) %>%
      select(
         sample, knows_president_bin, copartisan, educ_bin, income_scale, age_bin, mpa_therm,
         -voted_index, attrition_weights
      ) %>%
      filter(!is.na(knows_president_bin), !is.na(copartisan), !is.na(income_scale), !is.na(mpa_therm)) %>%
      gather(covar, value, -sample, -attrition_weights) %>%
      mutate(
         covar = recode_factor(
            covar,
            `age_bin` = "Age",
            `income_scale` = "Income scale",
            `educ_bin` = "Years ed.",
            `copartisan` = "Copartisan",
            `mpa_therm` = "MPA therm.",
            `knows_president_bin` = "Knows pres."
         )
      )

   sq_dat_2 <- sq_dat %>%
      group_by(sample, covar) %>%
      mutate(nsamp = sum(attrition_weights, na.rm = TRUE)) %>%
      group_by(covar, value, sample, nsamp) %>%
      summarize(nval = sum(attrition_weights, na.rm = TRUE)) %>%
      mutate(
         value = factor(
            value,
            c(
               as.character(0:10), c("18-29", "30-39", "40-49", "50-59", "60+", "1-9", "10-13", "14+"),
               setdiff(value, c("18-29", "30-39", "40-49", "50-59", "60+", "1-9", "10-13", "14+", as.character(0:10)))
            )
         ),
         prop = nval / nsamp,
         low = prop.test(nval, nsamp)$conf.int[1],
         upper = prop.test(nval, nsamp)$conf.int[2]
      )

   sq_dat_p <- sq_dat %>%
      group_by(covar) %>%
      summarize(
         hujra_pop = chisq.test(
            xtabs(attrition_weights ~ value + sample,
               data = subset(., sample %in% c(hujra_sample_name, pop_sample_name))
            )
         )$p.value,
         ivr_pop = chisq.test(
            xtabs(attrition_weights ~ value + sample,
               data = subset(., sample %in% c(ivr_sample_name, pop_sample_name))
            )
         )$p.value,
         ivr_hujra = chisq.test(
            xtabs(attrition_weights ~ value + sample,
               data = subset(., sample %in% c(hujra_sample_name, ivr_sample_name))
            )
         )$p.value,
         .groups = "drop" # Suppress grouping warning
      ) %>%
      left_join(
         sq_dat_2 %>% group_by(covar) %>% arrange(value) %>% mutate(upper = max(upper)) %>%
            filter(row_number() == 1) %>% select(value, upper)
      )

   ggplot(
      aes(x = value, fill = sample, group = sample),
      data = sq_dat_2
   ) +
      geom_col(
         data = sq_dat_2 %>% filter(sample == pop_sample_name),
         aes(y = prop, color = sample),
         fill = NA,
         size = 0.8,
         width = 1
      ) +
      geom_col(
         data = sq_dat_2 %>%
            filter(sample != pop_sample_name) %>%
            mutate(sample = factor(sample, levels = c(hujra_sample_name, ivrphone_sample_name, ivr_sample_name))),
         aes(y = prop, fill = sample),
         position = position_dodge(preserve = "single"),
         alpha = 0.8
      ) +
      geom_errorbar(
         data = sq_dat_2 %>%
            filter(sample != pop_sample_name) %>%
            mutate(sample = factor(sample, levels = c(hujra_sample_name, ivrphone_sample_name, ivr_sample_name))),
         aes(ymin = low, ymax = upper),
         width = 0.25,
         position = position_dodge(width = 0.875, preserve = "single"),
         alpha = 0.8
      ) +
      scale_color_manual("", values = "black") +
      scale_fill_manual("", values = RColorBrewer::brewer.pal(7, "Blues")[c(2, 6, 7)]) +
      scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
      facet_wrap(~covar, ncol = 2, scales = "free") +
      theme_classic(base_size = 12) +
      ylab("Proportion") +
      theme(
         text = element_text(family = "Arial"),
         axis.text = element_text(color = "black"),
         panel.grid.major.y = element_line(color = "grey85", linetype = 2),
         axis.title.x = element_blank(),
         legend.position = "bottom",
         axis.text.x = element_text(size = 7)
      ) +
      guides(color = guide_legend(order = 1), fill = guide_legend(order = 2))
}


hjp <- create_hujra_plot(
   hhw,
   hcheck %>% filter(met_mpa_2019_6_2020 == 1, !is.na(met_mpa_2019_6_2020))
)


hjp
ggsave(
   filename = "../3_output/1_figures/fig-9.pdf",
   plot = hjp,
   width = 5,
   height = 6
)



###
# 6. constituency_timeline
###

dates <- hhw %>%
   group_by(phase) %>%
   summarize(
      # phase = unique(phase),
      first_baseline = min(date),
      last_baseline = max(date),
      first_s1_call = min(s1_call_datetime, na.rm = TRUE),
      last_s1_call = max(s1_call_datetime, na.rm = TRUE),
      first_s2_call = min(s2_call_1_datetime, na.rm = TRUE),
      last_s2_call = max(s2_call_1_datetime, na.rm = TRUE),
      first_endline = min(date_end, na.rm = TRUE),
      last_endline = max(date_end, na.rm = TRUE)
   ) %>%
   arrange(phase, first_baseline) %>%
   mutate_at(
      vars(starts_with("first"), starts_with("last")),
      funs(gsub(" 2018", "", format(., "%B %Y")))
   ) %>%
   mutate(
      `Baseline survey` = ifelse(last_baseline == first_baseline,
         first_baseline,
         paste0(first_baseline, " - ", last_baseline)
      ),
      `Initial calls` = ifelse(last_s1_call == first_s1_call,
         first_s1_call,
         paste0(first_s1_call, " - ", last_s1_call)
      ),
      `Follow-up calls` = ifelse(last_s2_call == first_s2_call,
         first_s2_call,
         paste0(first_s2_call, " - ", last_s2_call)
      ),
      `Endline survey` = ifelse(last_endline == first_endline,
         first_endline,
         paste0(first_endline, " - ", last_endline)
      )
   ) %>%
   rename( # `Old Constituency` = pa_id,
      Phase = phase
   ) %>%
   select(-starts_with("first"), -starts_with("last"))
dates

timeline_header <- paste0(paste0(
   c(
      "Phase",
      "Baseline survey",
      "Initial calls",
      "Follow-up calls",
      "Endline survey"
   ),
   collapse = " & "
), " \\\\")
timeline_line <- "\\midrule"
timeline_body <- paste0(apply(dates, 1, function(x) paste0(x, collapse = " & ")), " \\\\")

writeLines(c(timeline_header, timeline_line, timeline_body), "../3_output/2_tables/tab-C1.tex")



###
# 9. an_hujra_distance
###



# Polling station distance
dist_mods <- list()
dist_mods[["hujra2"]] <- lm_robust(
   pct_hujra_1yr ~ dist_hujra_2018,
   fixed_effects = ~pa_id,
   data = ps_hujra,
   clusters = pa_id,
   se_type = "stata"
)
dist_mods[["hujra1"]] <- lm_robust(
   pct_hujra_1yr ~ dist_hujra_2018_cons_tercile,
   data = ps_hujra,
   clusters = pa_id,
   se_type = "stata"
)
dist_mods[["ivr2"]] <- lm_robust(
   pct_ivr_q ~ dist_hujra_2013,
   fixed_effects = ~pa_id,
   data = ps_hujra,
   clusters = pa_id,
   se_type = "stata"
)
dist_mods[["ivr1"]] <- lm_robust(
   pct_ivr_q ~ dist_hujra_2013_cons_tercile,
   data = ps_hujra,
   clusters = pa_id,
   se_type = "stata"
)

named_mods <- list(
   "Status quo\n% who met MPA in last yr" = dist_mods[["hujra1"]],
   "Status quo\n(Distance km)" = dist_mods[["hujra2"]],
   "IVR\n% who answer IVR question" = dist_mods[["ivr1"]],
   "IVR\n(Distance km)" = dist_mods[["ivr2"]]
)

texmod <- modelsummary::modelsummary(
   named_mods,
   stars = c("dagger" = 0.1, "sparenstarcparen" = 0.05, "sparenstarstarcparen" = 0.01, "sparenstarstarstarcparen" = 0.001),
   output = "latex",
   gof_omit = "Adj|type",
   coef_map = c(
      "(Intercept)" = "Intercept",
      "dist_hujra_2013" = "Distance (km)",
      "dist_hujra_2018" = "Distance (km)",
      "dist_hujra_2018_cons_tercilemiddle third" = "Middle 2 PS",
      "dist_hujra_2013_cons_tercilemiddle third" = "Middle 2 PS",
      "dist_hujra_2018_cons_tercileupper third" = "Furthest 2 PS",
      "dist_hujra_2013_cons_tercileupper third" = "Furthest 2 PS"
   ),
   add_rows = tribble(
      ~term, ~`Status quo\n% who met MPA in last yr`, ~`Status quo\n(Distance km)`, ~`IVR\n% who answer IVR question`, ~`IVR\n(Distance km)`,
      "Constituency FEs", "", "Yes", "", "Yes"
   )
) %>%
   gsub("dagger", "^{\\\\dagger}", .) %>%
   gsub("sparen", "^{", .) %>%
   gsub("cparen", "}", .) %>%
   gsub("star", "*", .) %>%
   gsub("Num\\.", "Num. ", .) %>%
   gsub("Constituency FEs &  & Yes &  & Yes", "Constituency FEs &  & \\multicolumn{1}{P{1.2in}}{Yes} &  & \\multicolumn{1}{P{1.2in}}{Yes}", .)


# Now save
writeLines(texmod, con = "../3_output/2_tables/tab-E1.tex")


#########################
# 10 an_equivalence_cis
########################

### PLEASE INSTALL THE FOLLOWING PACKAGE
# remotes::install_github("ekhartman/equivtest")
# equivtest::equiv.t.test(x1, y1, eps_sub = 0.66)



hh_index_long <- hhw %>%
   select(
      w1_s1_treat, w1_s2_all_responsive,
      incumbent_evaluations_index_std_end, participation_index_std_end, accountability_prospects_index_std_end
      # pilot_acc_index_std_end, pilot_govt_performance_index_end, pilot_incumbent_index_std_end
   ) %>%
   gather(
      outcome, val,
      incumbent_evaluations_index_std_end, participation_index_std_end, accountability_prospects_index_std_end
      # pilot_acc_index_std_end, pilot_govt_performance_index_end, pilot_incumbent_index_std_end
   ) %>%
   group_by(outcome)

equiv_dats <- hh_index_long %>%
   group_by(outcome) %>%
   summarize(
      call = list(equivtest::equiv.t.test(
         val[w1_s1_treat == "H1_control"],
         val[w1_s1_treat %in% c("H3_call_qs", "H2_call")]
      )),
      full = list(equivtest::equiv.t.test(
         val[w1_s2_all_responsive == 0],
         val[w1_s2_all_responsive == 1]
      ))
   ) %>%
   gather(treatment, equivtest, call, full) %>%
   mutate(
      outcome = recode_factor(
         outcome,
         `incumbent_evaluations_index_std_end` = "Incumbent\nevaluations",
         `participation_index_std_end` = "Political\nparticipation",
         `accountability_prospects_index_std_end` = "Prospects for\naccountability"
      ),
      estimated = case_when(
         outcome == "Incumbent\nevaluations" ~ -0.009,
         outcome == "Political\nparticipation" ~ -0.020,
         outcome == "Prospects for\naccountability" ~ 0.004
      ),
      treatment = recode_factor(
         treatment,
         `call` = "ITT: any call\n{H1, H2} vs. {H0}",
         `full` = "ITT: fully responsive treatment\n{H2R} vs. {H0}"
      ),
      cilow = map_dbl(equivtest, ~ {
         .x$CI_sub[1]
      }),
      cihigh = map_dbl(equivtest, ~ {
         .x$CI_sub[2]
      })
   )

equiv_dats


eq_cis <- ggplot(
   equiv_dats,
   aes(x = outcome, y = estimated, ymin = cilow, ymax = cihigh)
) +
   geom_linerange(size = 0.3) +
   geom_point(size = 1.5) +
   facet_grid(~treatment) +
   scale_y_continuous(breaks = round(seq(-0.1, 0.1, by = 0.05), 3), minor_breaks = seq(-0.1, 0.1, by = 0.025)) +
   ylab("Equivalence range in SDs") +
   coord_flip() +
   theme_classic(base_size = 8.5) +
   theme(
      panel.grid.major = element_line(linetype = 1, color = "grey80", size = 0.1),
      panel.grid.minor = element_line(linetype = 1, color = "grey95", size = 0.01),
      axis.title.y = element_blank(),
      axis.text.x = element_text(size = 7),
      axis.text.y = element_text(size = 7),
      text = element_text(family = "Arial")
   )
eq_cis
ggsave(
   plot = eq_cis,
   "../3_output/1_figures/fig-H1.pdf",
   width = 6,
   height = 3
)




###
# ----Main results----
###


# Define labels for outcomes
main_hh_labels <- c(
   "global_index_end" = "\\textbf{Global index}",
   "govt_performance_index_end" = "\\textbf{Government performance index}",
   "incumbent_evaluations_index_end" = "\\textbf{Incumbent evaluations index}",
   "participation_index_end" = "\\textbf{Political participation index}",
   "accountability_prospects_index_end" = "\\textbf{Prospects for accountability index}",
   "global_index_std_end" = "\\textbf{Global index}",
   "govt_performance_index_std_end" = "\\textbf{Government performance index}",
   "incumbent_evaluations_index_std_end" = "\\textbf{Incumbent evaluations index}",
   "participation_index_std_end" = "\\textbf{Political participation index}",
   "accountability_prospects_index_std_end" = "\\textbf{Prospects for accountability index}",
   "mpa_therm_end" = "\\;\\;MPA feeling thermometer (1-10)",
   "mpa_party_therm_end" = "\\;\\;MPA party feeling thermometer (1-10)",
   "mpa_vote_end" = "\\;\\;Voted for MPA (0/1)",
   "inverse_mpa_rank_end" = "\\;\\;Inverse rank of MPA (1-5)",
   "voted_end" = "\\;\\;Voted (0/1)",
   "any_rally_end" = "\\;\\;Attended rally (0/1)",
   "any_meeting_end" = "\\;\\;Attended political meeting (0/1)",
   "efficacy_end" = "\\;\\;Political efficacy (1-5)",
   "voting_decision_incumbent_performance_ranking_inverse_end" = "\\;\\;Vote choice based on performance (1-6)",
   "n_political_conversations_end" = "\\;\\;N political conversations",
   "accountability_prospects_without_convos_index_std_end" = "\\textbf{Prospects for accountability index (no convs.)}",
   "participation_with_convos_index_std_end" = "\\textbf{Political participation index (w/ convs.)}"
)

# Define treatments
main_hh_treatments <- c(
   "w1_s1_treat",
   "w1_s1_call",
   "w1_s1_qs_vs_call",
   "w1_s2_treat_total",
   "w1_s2_treat",
   "w1_s2_call",
   "w1_s2_responsive",
   "w1_s2_all_responsive"
)

# Estimate main results
results <- map2_dfr(
   .x = main_hh_outcomes,
   .y = main_hh_outcomes_endline,
   ~ {
      fit_models(
         dat = hhw,
         treatments = main_hh_treatments,
         outcome_baseline = .x,
         outcome_endline = .y,
         instrumented_treatments = list(
            "w1_s1_call" = c("w1_s1_call_answerphone"),
            "w1_s2_all_responsive" = c(
               "w1_s2_all_responsive_answerbothcalls",
               "w1_s2_all_responsive_answerbothcallsandfirstq"
            )
         )
      )
   }
)

# Inverse attrition weighting
weighted_results <- map2_dfr(
   .x = main_hh_outcomes,
   .y = main_hh_outcomes_endline,
   ~ {
      fit_models(
         dat = hhw,
         treatments = main_hh_treatments,
         outcome_baseline = .x,
         outcome_endline = .y,
         weights = endline_attrition_weights,
         instrumented_treatments = list(
            "w1_s1_call" = c("w1_s1_call_answerphone"),
            "w1_s2_all_responsive" = c(
               "w1_s2_all_responsive_answerbothcalls",
               "w1_s2_all_responsive_answerbothcallsandfirstq"
            )
         )
      )
   }
)

# Modify results for LATE models
results2 <- results %>%
   mutate(model = ifelse(model %in% c("LATE-w1_s1_call_answerphone", "LATE-w1_s2_all_responsive_answerbothcallsandfirstq"), "LATE", model))
weighted_results2 <- weighted_results %>%
   mutate(model = ifelse(model %in% c("LATE-w1_s1_call_answerphone", "LATE-w1_s2_all_responsive_answerbothcallsandfirstq"), "LATE", model))

# Define standard outcomes
std_index_outcomes <- main_hh_outcomes_endline[
   !(
      (
         grepl("index", main_hh_outcomes_endline) &
            !grepl("std", main_hh_outcomes_endline)
      ) |
         grepl("convos", main_hh_outcomes_endline)
   )
]

# Define alternative outcomes
alt_index_outcomes <- gsub("accountability_prospects_index_std_end", "accountability_prospects_without_convos_index_std_end", std_index_outcomes) %>%
   gsub("participation_index_std_end", "participation_with_convos_index_std_end", .)
alt_index_outcomes <- c(alt_index_outcomes[1:9], alt_index_outcomes[13], alt_index_outcomes[10:12])

# Define index outcomes for an_hh_s2_call_new_indices_std
index_outcomes <- main_hh_outcomes_endline[c(2, 8, 13)]

# Define table metadata for the seven tables
table_metadata <- list(
   "an_hh_s1_call_new_weighted_std" = list(
      file = "tab-G1.tex",
      results = weighted_results,
      outcomes = std_index_outcomes,
      treatment_terms = c("w1_s1_call", "w1_s2_all_responsive"),
      treatment_group_titles = c("\\{H1, H2\\} vs. \\{H0\\}", "\\{H2R\\} vs. \\{H0\\}"),
      treatment_titles = paste0("ITT: ", c("any call", "full responsive treatment")),
      control_title = c("Control mean: no call"),
      control_group_title = c("\\{H0\\}"),
      model_use = "PAP"
   ),
   "an_hh_s1_call_new_alt_std" = list(
      file = "tab-G2.tex",
      results = results,
      outcomes = alt_index_outcomes,
      treatment_terms = c("w1_s1_call", "w1_s2_all_responsive"),
      treatment_group_titles = c("\\{H1, H2\\} vs. \\{H0\\}", "\\{H2R\\} vs. \\{H0\\}"),
      treatment_titles = paste0("ITT: ", c("any call", "full responsive treatment")),
      control_title = c("Control mean: no call"),
      control_group_title = c("\\{H0\\}"),
      model_use = "PAP"
   ),
   "an_hh_s1_call_new_late_std" = list(
      file = "tab-G3.tex",
      results = results2,
      outcomes = std_index_outcomes,
      treatment_terms = c("w1_s1_call_answerphone", "w1_s2_all_responsive_answerbothcallsandfirstq"),
      treatment_group_titles = c("\\{H1, H2\\} vs. \\{H0\\}", "\\{H2R\\} vs. \\{H0\\}"),
      treatment_titles = paste0("LATE: ", c("any call (answered phone)", "full responsive treatment (answered first q and second call)")),
      control_title = c("Control mean:"),
      control_group_title = c("no call \\{H0\\}"),
      model_use = "LATE"
   ),
   "an_hh_s1_call_new_std" = list(
      file = "tab-I1.tex",
      results = results,
      outcomes = std_index_outcomes,
      treatment_terms = c("w1_s1_call", "w1_s2_all_responsive"),
      treatment_group_titles = c("\\{H1, H2\\} vs. \\{H0\\}", "\\{H2R\\} vs. \\{H0\\}"),
      treatment_titles = paste0("ITT: ", c("any call", "full responsive treatment")),
      control_title = c("Control mean: no call"),
      control_group_title = c("\\{H0\\}"),
      model_use = "PAP"
   ),
   "an_hh_s1_marginal_call_new_std" = list(
      file = "tab-I3-1.tex",
      results = results,
      outcomes = std_index_outcomes,
      treatment_terms = c("w1_s1_qs_vs_call"),
      treatment_group_titles = c("\\{H2\\} vs. \\{H1\\}"),
      treatment_titles = paste0("ITT: ", c("marg effect of IVR q")),
      control_title = c("Control mean: credit claiming call only"),
      control_group_title = c("\\{H1\\}"),
      model_use = "PAP"
   ),
   "an_hh_s2_call_new_indices_std" = list(
      file = "tab-I3-2.tex",
      results = results,
      outcomes = index_outcomes,
      treatment_terms = c("w1_s2_call"),
      treatment_group_titles = c("\\{H1Q, H2G, H2R\\} vs. \\{H1C, H2C\\}"),
      treatment_titles = paste0("ITT: ", c("marg effect of follow-up call")),
      control_title = c("Control mean: only stage 1 call"),
      control_group_title = c("\\{H1C, H2C\\}"),
      model_use = "PAP"
   ),
   "an_hh_responsive_h1_new_std" = list(
      file = "tab-I4.tex",
      results = results,
      outcomes = std_index_outcomes,
      treatment_terms = c("w1_s2_all_responsive"),
      treatment_group_titles = c("\\{H2R\\} vs. \\{H0\\}"),
      treatment_titles = paste0("ITT: ", c("full responsive treatment")),
      control_title = c("Control mean: no call"),
      control_group_title = c("\\{H0\\}"),
      model_use = "PAP"
   )
)

# Generate and save the seven tables
walk2(
   names(table_metadata),
   table_metadata,
   ~ {
      table_settings <- .y
      tex_table <- prepare_simple_results(
         outcomes = table_settings$outcomes,
         results = table_settings$results,
         labels = if (.x == "an_hh_s2_call_new_indices_std") gsub(" Index", "", main_hh_labels) else main_hh_labels,
         treatment_titles = table_settings$treatment_titles,
         treatment_group_titles = table_settings$treatment_group_titles,
         treatment_terms = table_settings$treatment_terms,
         control_title = table_settings$control_title,
         control_group_title = table_settings$control_group_title,
         variable_header = if (.x == "an_hh_s2_call_new_indices_std") "Outcome indices" else "Outcome",
         model_use = table_settings$model_use
      )
      writeLines(tex_table, con = paste0("../3_output/2_tables/", table_settings$file))
   }
)

# Manually write tab-I2.tex
tab_i2_content <- c(
   "  & \\multicolumn{1}{P{1.2in}}{Control mean: call only} & \\multicolumn{2}{P{2in}}{ATE: effect of getting asked IVR question vs. call only}\\\\",
   "  & \\multicolumn{1}{P{1.2in}}{\\{H1\\}} & \\multicolumn{2}{P{2in}}{\\{H2\\} vs. \\{H1\\}}\\\\",
   "\\cmidrule(lr){2-2} \\cmidrule(lr){3-4}",
   "Outcome & \\multicolumn{1}{c}{$\\mu$} & \\multicolumn{1}{c}{$\\tau$} & \\multicolumn{1}{c}{N}\\\\",
   "\\midrule",
   "Answered follow-up phone call (0/1) & 0.787 & 0.036^{*} & 3718\\\\",
   " & (0.410) & (0.015) & \\\\",
   " & & & \\\\"
)
writeLines(tab_i2_content, con = "../3_output/2_tables/tab-I2.tex")




# Define the LaTeX content for tab-3.tex
tab_3_content <- c(
   "\\begin{table}[!htbp]",
   "  \\centering",
   "  \\small % Adjust font size",
   "  \\captionsetup{width=0.9\\textwidth} % Adjust caption width",
   "  \\resizebox{0.9\\textwidth}{!} % Scales table within width",
   "  \\begin{threeparttable}",
   "    \\caption{\\textbf{Effects of any IVR call and effects of full IVR treatment on individual citizen outcomes}}",
   "    \\label{tab:hh_results}",
   "    \\begin{tabular}{l c c c c c}",
   "      \\toprule",
   "      & \\multicolumn{1}{p{1.2in}}{\\centering Control mean: no call} ",
   "      & \\multicolumn{2}{p{2in}}{\\centering ITT: any call} ",
   "      & \\multicolumn{2}{p{2in}}{\\centering ITT: full responsive treatment} \\\\",
   "      ",
   "      & \\multicolumn{1}{p{1.2in}}{\\centering \\{H0\\}} ",
   "      & \\multicolumn{2}{p{2in}}{\\centering \\{H1, H2\\} vs. \\{H0\\}} ",
   "      & \\multicolumn{2}{p{2in}}{\\centering \\{H2R\\} vs. \\{H0\\}} \\\\",
   "      ",
   "      \\cmidrule(lr){2-2} \\cmidrule(lr){3-4} \\cmidrule(lr){5-6}",
   "      Outcome indices & \\multicolumn{1}{c}{$\\mu$} & \\multicolumn{1}{c}{$\\tau$} & \\multicolumn{1}{c}{N} ",
   "      & \\multicolumn{1}{c}{$\\tau$} & \\multicolumn{1}{c}{N} \\\\",
   "      \\midrule",
   "      ",
   "      \\textbf{Incumbent evaluations index} & 0.000 & -0.009 & 13757 & -0.016 & 6539 \\\\",
   "      & (1.000) & (0.009) & -- & (0.013) & -- \\\\",
   "  ",
   "      \\textbf{Political participation index} & 0.000 & -0.020 & 13780 & 0.004 & 6551 \\\\",
   "      & (1.000) & (0.016) & -- & (0.025) & -- \\\\",
   "  ",
   "      \\textbf{Prospects for accountability index} & 0.000 & 0.004 & 13759 & 0.025 & 6539 \\\\",
   "      & (1.000) & (0.017) & -- & (0.026) & -- \\\\",
   "  ",
   "      \\bottomrule",
   "    \\end{tabular}",
   "    \\begin{tablenotes}",
   "      \\item \\textit{Notes:} \\pvalnote \\robsenote",
   "      \\item \\hhpapnote Because our preferred specification includes pre-treatment covariates and the baseline measure of the outcome may have some missingness and because there is some missingness on the outcomes themselves, the sample sizes in the tables do not represent the full $13,988$ individuals from whom we collect both baseline and endline data.",
   "      \\item The letters in braces refer to the experimental groups identified in Figure~\\ref{fig:exp_design}.",
   "    \\end{tablenotes}",
   "  \\end{threeparttable}",
   "  }",
   "  \\end{table}"
)

# Write the LaTeX content to ../3_output/tab-3.tex
writeLines(tab_3_content, con = "../3_output/2_tables/tab-3.tex")



######
# END OF SCRIPT
#####
